Load Datasets
suppressMessages(library(data.table))
suppressMessages(library(readxl))
suppressMessages(library(dplyr))
suppressMessages(library(stringr))
suppressMessages(library(ggplot2))
suppressMessages(library(gclus))
suppressMessages(library(NbClust))
suppressMessages(library(fastDummies))
suppressMessages(library(cluster))
set.seed(7)
theme_set(theme_bw())
viz_path <- "visualizations/RQ3"
# Metadata
coin_features <- fread("datasets/coin_features.csv") %>% as.data.frame()
ticker_info <- fread("datasets/ticker_info.csv") %>% as.data.frame()
coin_features <- coin_features %>% select(-ecosystem) %>% filter(!symbol %in% c("BTC-USD", "ETH-USD", "ZEN-USD")) %>% mutate(mineability = if_else(mineability, 1, 0))
coin_features <- dummy_cols(coin_features, select_columns = c("consensus", "hash"), remove_selected_columns = T)
# Error Index, cannot be calculated: ccc, scott, marriot, trcovw, tracew, friedman, rubin
indices <-c("kl", "ch", "hartigan", "cindex", "db", "silhouette", "duda", "pseudot2", "beale", "ratkowsky", "ball", "ptbiserial", "gap", "frey", "mcclain", "gamma", "gplus", "tau", "dunn", "hubert", "sdindex", "dindex", "sdbw")
cls.features <- select(coin_features, -1)
nc_list <- c()
for (idx in indices){
nc <- NbClust(cls.features, method="complete", index=idx)$Best.nc # find number of clusters
nc_list <- c(nc_list, nc[1])
}
Warning in max(res[, 25], na.rm = TRUE) :
no non-missing arguments to max; returning -Inf
Warning in matrix(c(results), nrow = 2, ncol = 30) :
data length [59] is not a sub-multiple or multiple of the number of rows [2]
Warning in matrix(c(results), nrow = 2, ncol = 30, dimnames = list(c("Number_clusters", :
data length [59] is not a sub-multiple or multiple of the number of rows [2]
Warning in min(k) : no non-missing arguments to min; returning Inf
Warning in max(k) : no non-missing arguments to max; returning -Inf
*** : The Hubert index is a graphical method of determining the number of clusters.
In the plot of Hubert index, we seek a significant knee that corresponds to a
significant increase of the value of the measure i.e the significant peak in Hubert
index second differences plot.
*** : The D index is a graphical method of determining the number of clusters.
In the plot of D index, we seek a significant knee (the significant peak in Dindex
second differences plot) that corresponds to a significant increase of the value of
the measure.
stat_mode <- function(v) {
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
nc <- 3 # stat_mode(nc_list)
cls.kmeans <- kmeans(cls.features, centers = nc) # run k-mean clustering
cls.hclust <- hclust(dist(cls.features, method = "euclidean"), method = "complete") # run hierarchical clustering
# visualize clusters
clusplot(cls.features, cls.kmeans$cluster, color = T, shade = T, labels = nc)
plot(cls.hclust, labels = coin_features$symbol, cex = 0.8)
rect.hclust(cls.hclust, 6)
cls.hclust.cn <- cutree(cls.hclust, k = 6)
# pamd <- pam(dist(cls.features, method = "euclidean"), 6)
#
# sobj <- silhouette(pamd)
# plot(sobj, col=2:7)
library(tidyr)
# interpret each cluster
fcluster <- as.data.frame(cls.hclust.cn)
colnames(fcluster) <- c("cluster")
coin_features.cluster <- bind_cols(list(coin_features, fcluster)) %>% mutate(cluster = as.factor(cluster))
cls.hclust.stats <- bind_cols(list(cls.features, fcluster)) %>% group_by(cluster) %>% summarise_all(list(mean)) %>% gather("traits", "meanval", 2:46) %>% mutate(cluster = as.factor(cluster))
cls.hclust.stats %>% ggplot(aes(x = traits, y = meanval, fill = cluster)) +
geom_bar(stat="identity", position = "dodge") +
ggtitle("Average Features by Clusters") +
xlab("") +
ylab("Average Values") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# filter by each cluster
C1 <- coin_features.cluster %>% filter(cluster == "1")
C2 <- coin_features.cluster %>% filter(cluster == "2")
C3 <- coin_features.cluster %>% filter(cluster == "3")
C4 <- coin_features.cluster %>% filter(cluster == "4")
C5 <- coin_features.cluster %>% filter(cluster == "5")
C6 <- coin_features.cluster %>% filter(cluster == "6")
Pre-COVID19
# visualize coins' prices within cluster
price_cols <- c("Date", "Close", "Volume", "change", "returns", "volatility")
cluster.groups <- c("C1", "C2", "C3", "C4", "C5", "C6")
for (cluster.group in cluster.groups) {
ndays <- 365
coins.prices <- data.frame(matrix(nrow = ndays, ncol = 0))
coins.changes <- data.frame(matrix(nrow = ndays, ncol = 0))
coins.returns <- data.frame(matrix(nrow = ndays, ncol = 0))
coins.volatility <- data.frame(matrix(nrow = ndays, ncol = 0))
for (symbol in get(cluster.group)$symbol) {
values <- fread(paste("datasets/daily/coins", paste(symbol, 'csv', sep = "."), sep = "/")) %>% as.data.frame() %>% filter(Date >= "2019-01-01" & Date <= "2019-12-31") %>% select(all_of(price_cols)) %>% arrange(Date)
symbol <- str_split(symbol, "-", simplify = T)[1]
coins.prices['Date'] <- values$Date
coins.changes['Date'] <- values$Date
coins.returns['Date'] <- values$Date
coins.volatility['Date'] <- values$Date
coins.prices[symbol] <- values$Close
coins.changes[symbol] <- values$change
coins.returns[symbol] <- values$returns
coins.volatility[symbol] <- values$volatility
}
coins.prices.melt <- reshape2::melt(coins.prices, "Date", value.name = "Prices", variable.name = "Coins")
coins.prices.plot <- ggplot(data = coins.prices.melt, aes(x = Date, y = Prices, color = Coins)) +
ggtitle(paste("Price Movement of", cluster.group)) +
geom_line()
coins.prices.boxplot <- ggplot(data = coins.prices.melt, aes(x = Coins, y = Prices, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Price Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.prices.plot)
print(coins.prices.boxplot)
coins.changes.melt <- reshape2::melt(coins.changes, "Date", value.name = "Changes", variable.name = "Coins")
coins.changes.plot <- ggplot(data = coins.changes.melt, aes(x = Date, y = Changes, color = Coins)) +
ggtitle(paste("Change Movement of", cluster.group)) +
geom_line()
coins.changes.boxplot <- ggplot(data = coins.changes.melt, aes(x = Coins, y = Changes, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Change Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.changes.plot)
print(coins.changes.boxplot)
coins.returns.melt <- reshape2::melt(coins.returns, "Date", value.name = "Returns", variable.name = "Coins")
coins.returns.plot <- ggplot(data = coins.returns.melt, aes(x = Date, y = Returns, color = Coins)) +
ggtitle(paste("Returns Movement of", cluster.group)) +
geom_line()
coins.returns.boxplot <- ggplot(data = coins.returns.melt, aes(x = Coins, y = Returns, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Returns Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.returns.plot)
print(coins.returns.boxplot)
coins.volatility.melt <- reshape2::melt(coins.volatility, "Date", value.name = "Volatility", variable.name = "Coins")
coins.volatility.plot <- ggplot(data = coins.volatility.melt, aes(x = Date, y = Volatility, color = Coins)) +
ggtitle(paste("Volatility Movement of", cluster.group)) +
geom_line()
coins.volatility.boxplot <- ggplot(data = coins.volatility.melt, aes(x = Coins, y = Volatility, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Volatility Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.volatility.plot)
print(coins.volatility.boxplot)
# average prices for inter-cluster comparison
coins.prices.avg <- coins.prices.melt %>% group_by(Date) %>% summarise(avg_prices = mean(Prices, na.rm = T)) %>% mutate(cluster = cluster.group)
coins.changes.avg <- coins.changes.melt %>% group_by(Date) %>% summarise(avg_changes = mean(Changes, na.rm = T)) %>% mutate(cluster = cluster.group)
coins.returns.avg <- coins.returns.melt %>% group_by(Date) %>% summarise(avg_returns = mean(Returns, na.rm = T)) %>% mutate(cluster = cluster.group)
coins.volatility.avg <- coins.volatility.melt %>% group_by(Date) %>% summarise(avg_volatility = mean(Volatility, na.rm = T)) %>% mutate(cluster = cluster.group)
assign(paste(cluster.group, "prices.avg", sep = "."), coins.prices.avg)
assign(paste(cluster.group, "changes.avg", sep = "."), coins.changes.avg)
assign(paste(cluster.group, "returns.avg", sep = "."), coins.returns.avg)
assign(paste(cluster.group, "volatility.avg", sep = "."), coins.volatility.avg)
}
Warning: Removed 1832 row(s) containing missing values (geom_path).
Warning: Removed 1832 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 1834 row(s) containing missing values (geom_path).
Warning: Removed 1834 rows containing non-finite values (stat_boxplot).
Warning: Removed 1834 row(s) containing missing values (geom_path).
Warning: Removed 1834 rows containing non-finite values (stat_boxplot).
Warning: Removed 1830 row(s) containing missing values (geom_path).
Warning: Removed 1830 rows containing non-finite values (stat_boxplot).
Warning: Removed 2205 row(s) containing missing values (geom_path).
Warning: Removed 2205 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 2210 row(s) containing missing values (geom_path).
Warning: Removed 2210 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 2210 row(s) containing missing values (geom_path).
Warning: Removed 2210 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 2200 row(s) containing missing values (geom_path).
Warning: Removed 2200 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 1645 row(s) containing missing values (geom_path).
Warning: Removed 1645 rows containing non-finite values (stat_boxplot).
Warning: Removed 1648 row(s) containing missing values (geom_path).
Warning: Removed 1648 rows containing non-finite values (stat_boxplot).
Warning: Removed 1648 row(s) containing missing values (geom_path).
Warning: Removed 1648 rows containing non-finite values (stat_boxplot).
Warning: Removed 1642 row(s) containing missing values (geom_path).
Warning: Removed 1642 rows containing non-finite values (stat_boxplot).
Warning: Removed 3383 row(s) containing missing values (geom_path).
Warning: Removed 3383 rows containing non-finite values (stat_boxplot).
Warning: Removed 3386 row(s) containing missing values (geom_path).
Warning: Removed 3386 rows containing non-finite values (stat_boxplot).
Warning: Removed 3386 row(s) containing missing values (geom_path).
Warning: Removed 3386 rows containing non-finite values (stat_boxplot).
Warning: Removed 3380 row(s) containing missing values (geom_path).
Warning: Removed 3380 rows containing non-finite values (stat_boxplot).
Warning: Removed 1095 row(s) containing missing values (geom_path).
Warning: Removed 1095 rows containing non-finite values (stat_boxplot).
Warning: Removed 1095 row(s) containing missing values (geom_path).
Warning: Removed 1095 rows containing non-finite values (stat_boxplot).
Warning: Removed 1095 row(s) containing missing values (geom_path).
Warning: Removed 1095 rows containing non-finite values (stat_boxplot).
Warning: Removed 1095 row(s) containing missing values (geom_path).
Warning: Removed 1095 rows containing non-finite values (stat_boxplot).
Warning: Removed 3934 row(s) containing missing values (geom_path).
Warning: Removed 3934 rows containing non-finite values (stat_boxplot).
Warning: Removed 3937 row(s) containing missing values (geom_path).
Warning: Removed 3937 rows containing non-finite values (stat_boxplot).
Warning: Removed 3937 row(s) containing missing values (geom_path).
Warning: Removed 3937 rows containing non-finite values (stat_boxplot).
Warning: Removed 3931 row(s) containing missing values (geom_path).
Warning: Removed 3931 rows containing non-finite values (stat_boxplot).
# inter-cluster prices
for (movement in c("Prices", "Changes", "Returns", "Volatility")) {
movement.lower <- tolower(movement)
y_val <- paste("avg", movement.lower, sep = "_")
cluster.plot <- ggplot(data = get(paste("coins", movement.lower, "avg", sep = ".")), aes(x = Date)) +
geom_line(aes(y = get(y_val), color = "C1"), data = get(paste("C1", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C2"), data = get(paste("C2", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C3"), data = get(paste("C3", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C4"), data = get(paste("C4", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C5"), data = get(paste("C5", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C6"), data = get(paste("C6", movement.lower, "avg", sep = "."))) +
ylab(paste("Average", movement)) +
labs(color = "Cluster") +
ggtitle(paste(movement, "Movement between Clusters"))
cluster.boxplot <- ggplot() +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C1"), data = get(paste("C1", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C2"), data = get(paste("C2", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C3"), data = get(paste("C3", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C4"), data = get(paste("C4", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C5"), data = get(paste("C5", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C6"), data = get(paste("C6", movement.lower, "avg", sep = ".")), notch = T) +
ylab(movement) +
labs(fill = "Cluster") +
ggtitle(paste(movement, "Boxplot between Clusters"))
print(cluster.plot)
print(cluster.boxplot)
}
NA
NA
Peri-COVID19
# visualize coins' prices within cluster
price_cols <- c("Date", "Close", "Volume", "change", "returns", "volatility")
cluster.groups <- c("C1", "C2", "C3", "C4", "C5", "C6")
for (cluster.group in cluster.groups) {
ndays <- 362
coins.prices <- data.frame(matrix(nrow = ndays, ncol = 0))
coins.changes <- data.frame(matrix(nrow = ndays, ncol = 0))
coins.returns <- data.frame(matrix(nrow = ndays, ncol = 0))
coins.volatility <- data.frame(matrix(nrow = ndays, ncol = 0))
for (symbol in get(cluster.group)$symbol) {
values <- fread(paste("datasets/daily/coins", paste(symbol, 'csv', sep = "."), sep = "/")) %>% as.data.frame() %>% filter(Date >= "2020-01-01" & Date <= "2020-12-31") %>% select(all_of(price_cols)) %>% arrange(Date)
symbol <- str_split(symbol, "-", simplify = T)[1]
coins.prices['Date'] <- values$Date
coins.changes['Date'] <- values$Date
coins.returns['Date'] <- values$Date
coins.volatility['Date'] <- values$Date
coins.prices[symbol] <- values$Close
coins.changes[symbol] <- values$change
coins.returns[symbol] <- values$returns
coins.volatility[symbol] <- values$volatility
}
coins.prices.melt <- reshape2::melt(coins.prices, "Date", value.name = "Prices", variable.name = "Coins")
coins.prices.plot <- ggplot(data = coins.prices.melt, aes(x = Date, y = Prices, color = Coins)) +
ggtitle(paste("Price Movement of", cluster.group)) +
geom_line()
coins.prices.boxplot <- ggplot(data = coins.prices.melt, aes(x = Coins, y = Prices, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Price Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.prices.plot)
print(coins.prices.boxplot)
coins.changes.melt <- reshape2::melt(coins.changes, "Date", value.name = "Changes", variable.name = "Coins")
coins.changes.plot <- ggplot(data = coins.changes.melt, aes(x = Date, y = Changes, color = Coins)) +
ggtitle(paste("Change Movement of", cluster.group)) +
geom_line()
coins.changes.boxplot <- ggplot(data = coins.changes.melt, aes(x = Coins, y = Changes, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Change Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.changes.plot)
print(coins.changes.boxplot)
coins.returns.melt <- reshape2::melt(coins.returns, "Date", value.name = "Returns", variable.name = "Coins")
coins.returns.plot <- ggplot(data = coins.returns.melt, aes(x = Date, y = Returns, color = Coins)) +
ggtitle(paste("Returns Movement of", cluster.group)) +
geom_line()
coins.returns.boxplot <- ggplot(data = coins.returns.melt, aes(x = Coins, y = Returns, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Returns Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.returns.plot)
print(coins.returns.boxplot)
coins.volatility.melt <- reshape2::melt(coins.volatility, "Date", value.name = "Volatility", variable.name = "Coins")
coins.volatility.plot <- ggplot(data = coins.volatility.melt, aes(x = Date, y = Volatility, color = Coins)) +
ggtitle(paste("Volatility Movement of", cluster.group)) +
geom_line()
coins.volatility.boxplot <- ggplot(data = coins.volatility.melt, aes(x = Coins, y = Volatility, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Volatility Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.volatility.plot)
print(coins.volatility.boxplot)
# average prices for inter-cluster comparison
coins.prices.avg <- coins.prices.melt %>% group_by(Date) %>% summarise(avg_prices = mean(Prices, na.rm = T)) %>% mutate(cluster = cluster.group)
coins.changes.avg <- coins.changes.melt %>% group_by(Date) %>% summarise(avg_changes = mean(Changes, na.rm = T)) %>% mutate(cluster = cluster.group)
coins.returns.avg <- coins.returns.melt %>% group_by(Date) %>% summarise(avg_returns = mean(Returns, na.rm = T)) %>% mutate(cluster = cluster.group)
coins.volatility.avg <- coins.volatility.melt %>% group_by(Date) %>% summarise(avg_volatility = mean(Volatility, na.rm = T)) %>% mutate(cluster = cluster.group)
assign(paste(cluster.group, "prices.avg", sep = "."), coins.prices.avg)
assign(paste(cluster.group, "changes.avg", sep = "."), coins.changes.avg)
assign(paste(cluster.group, "returns.avg", sep = "."), coins.returns.avg)
assign(paste(cluster.group, "volatility.avg", sep = "."), coins.volatility.avg)
}
Warning: Removed 968 row(s) containing missing values (geom_path).
Warning: Removed 1095 rows containing non-finite values (stat_boxplot).
Warning: Removed 976 row(s) containing missing values (geom_path).
Warning: Removed 1117 rows containing non-finite values (stat_boxplot).
Warning: Removed 976 row(s) containing missing values (geom_path).
Warning: Removed 1117 rows containing non-finite values (stat_boxplot).
Warning: Removed 964 row(s) containing missing values (geom_path).
Warning: Removed 1053 rows containing non-finite values (stat_boxplot).
Warning: Removed 650 row(s) containing missing values (geom_path).
Warning: Removed 793 rows containing non-finite values (stat_boxplot).
Warning: Removed 652 row(s) containing missing values (geom_path).
Warning: Removed 813 rows containing non-finite values (stat_boxplot).
Warning: Removed 652 row(s) containing missing values (geom_path).
Warning: Removed 813 rows containing non-finite values (stat_boxplot).
Warning: Removed 646 row(s) containing missing values (geom_path).
Warning: Removed 753 rows containing non-finite values (stat_boxplot).
Warning: Removed 1112 row(s) containing missing values (geom_path).
Warning: Removed 1291 rows containing non-finite values (stat_boxplot).
Warning: Removed 1184 row(s) containing missing values (geom_path).
Warning: Removed 1311 rows containing non-finite values (stat_boxplot).
Warning: Removed 1184 row(s) containing missing values (geom_path).
Warning: Removed 1311 rows containing non-finite values (stat_boxplot).
Warning: Removed 1108 row(s) containing missing values (geom_path).
Warning: Removed 1249 rows containing non-finite values (stat_boxplot).
Warning: Removed 2688 row(s) containing missing values (geom_path).
Warning: Removed 2953 rows containing non-finite values (stat_boxplot).
Warning: Removed 2710 row(s) containing missing values (geom_path).
Warning: Removed 2983 rows containing non-finite values (stat_boxplot).
Warning: Removed 2794 row(s) containing missing values (geom_path).
Warning: Removed 3040 rows containing non-finite values (stat_boxplot).
Warning: Removed 2677 row(s) containing missing values (geom_path).
Warning: Removed 2890 rows containing non-finite values (stat_boxplot).
Warning: Removed 478 row(s) containing missing values (geom_path).
Warning: Removed 570 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 481 row(s) containing missing values (geom_path).
Warning: Removed 586 rows containing non-finite values (stat_boxplot).
Warning: Removed 481 row(s) containing missing values (geom_path).
Warning: Removed 586 rows containing non-finite values (stat_boxplot).
Warning: Removed 474 row(s) containing missing values (geom_path).
Warning: Removed 540 rows containing non-finite values (stat_boxplot).
Warning: Removed 2967 row(s) containing missing values (geom_path).
Warning: Removed 3168 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
notch went outside hinges. Try setting notch=FALSE.
notch went outside hinges. Try setting notch=FALSE.
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 2976 row(s) containing missing values (geom_path).
Warning: Removed 3205 rows containing non-finite values (stat_boxplot).
Warning: Removed 2976 row(s) containing missing values (geom_path).
Warning: Removed 3205 rows containing non-finite values (stat_boxplot).
Warning: Removed 2944 row(s) containing missing values (geom_path).
Warning: Removed 3089 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
# inter-cluster prices
for (movement in c("Prices", "Changes", "Returns", "Volatility")) {
movement.lower <- tolower(movement)
y_val <- paste("avg", movement.lower, sep = "_")
cluster.plot <- ggplot(data = get(paste("coins", movement.lower, "avg", sep = ".")), aes(x = Date)) +
geom_line(aes(y = get(y_val), color = "C1"), data = get(paste("C1", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C2"), data = get(paste("C2", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C3"), data = get(paste("C3", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C4"), data = get(paste("C4", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C5"), data = get(paste("C5", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C6"), data = get(paste("C6", movement.lower, "avg", sep = "."))) +
ylab(paste("Average", movement)) +
labs(color = "Cluster") +
ggtitle(paste(movement, "Movement between Clusters"))
cluster.boxplot <- ggplot() +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C1"), data = get(paste("C1", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C2"), data = get(paste("C2", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C3"), data = get(paste("C3", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C4"), data = get(paste("C4", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C5"), data = get(paste("C5", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C6"), data = get(paste("C6", movement.lower, "avg", sep = ".")), notch = T) +
ylab(movement) +
labs(fill = "Cluster") +
ggtitle(paste(movement, "Boxplot between Clusters"))
print(cluster.plot)
print(cluster.boxplot)
}
Post-COVID19
# visualize coins' prices within cluster
price_cols <- c("Date", "Close", "Volume", "change", "returns", "volatility")
cluster.groups <- c("C1", "C2", "C3", "C4", "C5", "C6")
for (cluster.group in cluster.groups) {
ndays <- 304
coins.prices <- data.frame(matrix(nrow = ndays, ncol = 0))
coins.changes <- data.frame(matrix(nrow = ndays, ncol = 0))
coins.returns <- data.frame(matrix(nrow = ndays, ncol = 0))
coins.volatility <- data.frame(matrix(nrow = ndays, ncol = 0))
for (symbol in get(cluster.group)$symbol) {
values <- fread(paste("datasets/daily/coins", paste(symbol, 'csv', sep = "."), sep = "/")) %>% as.data.frame() %>% filter(Date >= "2021-01-01") %>% select(all_of(price_cols)) %>% arrange(Date)
symbol <- str_split(symbol, "-", simplify = T)[1]
coins.prices['Date'] <- values$Date
coins.changes['Date'] <- values$Date
coins.returns['Date'] <- values$Date
coins.volatility['Date'] <- values$Date
coins.prices[symbol] <- values$Close
coins.changes[symbol] <- values$change
coins.returns[symbol] <- values$returns
coins.volatility[symbol] <- values$volatility
}
coins.prices.melt <- reshape2::melt(coins.prices, "Date", value.name = "Prices", variable.name = "Coins")
coins.prices.plot <- ggplot(data = coins.prices.melt, aes(x = Date, y = Prices, color = Coins)) +
ggtitle(paste("Price Movement of", cluster.group)) +
geom_line()
coins.prices.boxplot <- ggplot(data = coins.prices.melt, aes(x = Coins, y = Prices, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Price Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.prices.plot)
print(coins.prices.boxplot)
coins.changes.melt <- reshape2::melt(coins.changes, "Date", value.name = "Changes", variable.name = "Coins")
coins.changes.plot <- ggplot(data = coins.changes.melt, aes(x = Date, y = Changes, color = Coins)) +
ggtitle(paste("Change Movement of", cluster.group)) +
geom_line()
coins.changes.boxplot <- ggplot(data = coins.changes.melt, aes(x = Coins, y = Changes, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Change Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.changes.plot)
print(coins.changes.boxplot)
coins.returns.melt <- reshape2::melt(coins.returns, "Date", value.name = "Returns", variable.name = "Coins")
coins.returns.plot <- ggplot(data = coins.returns.melt, aes(x = Date, y = Returns, color = Coins)) +
ggtitle(paste("Returns Movement of", cluster.group)) +
geom_line()
coins.returns.boxplot <- ggplot(data = coins.returns.melt, aes(x = Coins, y = Returns, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Returns Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.returns.plot)
print(coins.returns.boxplot)
coins.volatility.melt <- reshape2::melt(coins.volatility, "Date", value.name = "Volatility", variable.name = "Coins")
coins.volatility.plot <- ggplot(data = coins.volatility.melt, aes(x = Date, y = Volatility, color = Coins)) +
ggtitle(paste("Volatility Movement of", cluster.group)) +
geom_line()
coins.volatility.boxplot <- ggplot(data = coins.volatility.melt, aes(x = Coins, y = Volatility, fill = Coins)) +
geom_boxplot(notch = T) +
ggtitle(paste("Volatility Boxplot of", cluster.group)) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
print(coins.volatility.plot)
print(coins.volatility.boxplot)
# average prices for inter-cluster comparison
coins.prices.avg <- coins.prices.melt %>% group_by(Date) %>% summarise(avg_prices = mean(Prices, na.rm = T)) %>% mutate(cluster = cluster.group)
coins.changes.avg <- coins.changes.melt %>% group_by(Date) %>% summarise(avg_changes = mean(Changes, na.rm = T)) %>% mutate(cluster = cluster.group)
coins.returns.avg <- coins.returns.melt %>% group_by(Date) %>% summarise(avg_returns = mean(Returns, na.rm = T)) %>% mutate(cluster = cluster.group)
coins.volatility.avg <- coins.volatility.melt %>% group_by(Date) %>% summarise(avg_volatility = mean(Volatility, na.rm = T)) %>% mutate(cluster = cluster.group)
assign(paste(cluster.group, "prices.avg", sep = "."), coins.prices.avg)
assign(paste(cluster.group, "changes.avg", sep = "."), coins.changes.avg)
assign(paste(cluster.group, "returns.avg", sep = "."), coins.returns.avg)
assign(paste(cluster.group, "volatility.avg", sep = "."), coins.volatility.avg)
}
Warning: Removed 360 row(s) containing missing values (geom_path).
Warning: Removed 360 rows containing non-finite values (stat_boxplot).
Warning: Removed 362 row(s) containing missing values (geom_path).
Warning: Removed 362 rows containing non-finite values (stat_boxplot).
Warning: Removed 362 row(s) containing missing values (geom_path).
Warning: Removed 362 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 358 row(s) containing missing values (geom_path).
Warning: Removed 358 rows containing non-finite values (stat_boxplot).
Warning: Removed 180 row(s) containing missing values (geom_path).
Warning: Removed 180 rows containing non-finite values (stat_boxplot).
Warning: Removed 182 row(s) containing missing values (geom_path).
Warning: Removed 182 rows containing non-finite values (stat_boxplot).
Warning: Removed 182 row(s) containing missing values (geom_path).
Warning: Removed 182 rows containing non-finite values (stat_boxplot).
Warning: Removed 178 row(s) containing missing values (geom_path).
Warning: Removed 178 rows containing non-finite values (stat_boxplot).
Warning: Removed 856 row(s) containing missing values (geom_path).
Warning: Removed 898 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 861 row(s) containing missing values (geom_path).
Warning: Removed 904 rows containing non-finite values (stat_boxplot).
Warning: Removed 861 row(s) containing missing values (geom_path).
Warning: Removed 904 rows containing non-finite values (stat_boxplot).
Warning: Removed 851 row(s) containing missing values (geom_path).
Warning: Removed 891 rows containing non-finite values (stat_boxplot).
Warning: Removed 1802 row(s) containing missing values (geom_path).
Warning: Removed 1802 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 1813 row(s) containing missing values (geom_path).
Warning: Removed 1813 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 1813 row(s) containing missing values (geom_path).
Warning: Removed 1813 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 1791 row(s) containing missing values (geom_path).
Warning: Removed 1791 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 90 row(s) containing missing values (geom_path).
Warning: Removed 90 rows containing non-finite values (stat_boxplot).
notch went outside hinges. Try setting notch=FALSE.
Warning: Removed 91 row(s) containing missing values (geom_path).
Warning: Removed 91 rows containing non-finite values (stat_boxplot).
Warning: Removed 91 row(s) containing missing values (geom_path).
Warning: Removed 91 rows containing non-finite values (stat_boxplot).
Warning: Removed 89 row(s) containing missing values (geom_path).
Warning: Removed 89 rows containing non-finite values (stat_boxplot).
Warning: Removed 1713 row(s) containing missing values (geom_path).
Warning: Removed 1713 rows containing non-finite values (stat_boxplot).
Warning: Removed 1727 row(s) containing missing values (geom_path).
Warning: Removed 1727 rows containing non-finite values (stat_boxplot).
Warning: Removed 1727 row(s) containing missing values (geom_path).
Warning: Removed 1727 rows containing non-finite values (stat_boxplot).
Warning: Removed 1699 row(s) containing missing values (geom_path).
Warning: Removed 1699 rows containing non-finite values (stat_boxplot).
# inter-cluster prices
for (movement in c("Prices", "Changes", "Returns", "Volatility")) {
movement.lower <- tolower(movement)
y_val <- paste("avg", movement.lower, sep = "_")
cluster.plot <- ggplot(data = get(paste("coins", movement.lower, "avg", sep = ".")), aes(x = Date)) +
geom_line(aes(y = get(y_val), color = "C1"), data = get(paste("C1", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C2"), data = get(paste("C2", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C3"), data = get(paste("C3", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C4"), data = get(paste("C4", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C5"), data = get(paste("C5", movement.lower, "avg", sep = "."))) +
geom_line(aes(y = get(y_val), color = "C6"), data = get(paste("C6", movement.lower, "avg", sep = "."))) +
ylab(paste("Average", movement)) +
labs(color = "Cluster") +
ggtitle(paste(movement, "Movement between Clusters"))
cluster.boxplot <- ggplot() +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C1"), data = get(paste("C1", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C2"), data = get(paste("C2", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C3"), data = get(paste("C3", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C4"), data = get(paste("C4", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C5"), data = get(paste("C5", movement.lower, "avg", sep = ".")), notch = T) +
geom_boxplot(aes(y = get(y_val), x = cluster, fill = "C6"), data = get(paste("C6", movement.lower, "avg", sep = ".")), notch = T) +
ylab(movement) +
labs(fill = "Cluster") +
ggtitle(paste(movement, "Boxplot between Clusters"))
print(cluster.plot)
print(cluster.boxplot)
}